from bs4 import BeautifulSoup
from urllib.parse import urljoin
import requests
import csv


URL = 'http://xa.ganji.com/fang1/_%E9%87%91%E4%B8%9A%E8%A7%82%E6%B9%96%E5%A4%A7%E7%AC%AC/'
ADDR = 'http://xa.ganji.com'

if __name__ == '__main__':
    start_page = 1  # 开始爬取得页面
    end_page = 10  # 结束爬取得页面
    price = 7  # 价格
    print('start==================>')
    while start_page <= end_page:
        start_page += 1
        print('get:{0}'.format(URL.format(page=start_page, price=price)))
        res = requests.get(URL.format(page=start_page, price=price))
        html = BeautifulSoup(res.text, 'html.parser')  # 第一个参数要抓取的html文本，第二个是使用那种解析器
        house_list = html.select('.f-list > .f-list-item > .f-list-item-wrap')  # 获取房源信息
        if not house_list:
            break
        for house in house_list:
            house_title = house.select('.title > a')[0].text
            house_addr = house.select('.address > .area > a')[-1].text
            house_price = house.select('.info > .price > .num')[0].text
            house_url = urljoin(ADDR, house.select('.title > a')[0]['href'])
            print(house_title, house_addr, house_price, house_url)
    print('end===================.')
    